library(dplyr)
library(lfe)
library(sjPlot)
library(stargazer)
library(ggplot2)
library(lubridate)
library(coefplot)
library(modelsummary)

###Adjust wd/file locations before running.
setwd("")

#############################################################################
PrePrimary2 <- read.csv("ResPolPrePrimaryTweetsFile.csv")
#############################################################################

#########Candidate Data
cands <- read.csv("cands2022trimmed.csv")

#############################################################################
##### Negativity Plot
mean(PrePrimary2$Negative_Hand)
mean(PrePrimary2$Negative_Code1)
mean(PrePrimary2$Negative_Code2)
mean(PrePrimary2$Negative_Recon)

ideoavg <- PrePrimary2 %>%
  group_by(Party) %>%
  dplyr::summarize(
    hand = mean(ideo_hand),
    code1 = mean(ideo_code1),
    code2 = mean(ideo_code2),
    recon = mean(ideo_recon),
  ) %>%
  ungroup()

mean(PrePrimary2$ideo_hand)
mean(PrePrimary2$ideo_code1)
mean(PrePrimary2$ideo_code2)
mean(PrePrimary2$ideo_recon)
####

trendshand <- PrePrimary2 %>%
  group_by(DaysBefore) %>%
  dplyr::summarize(
    Negative = mean(Negative_Hand)*100
  ) %>%
  ungroup()

trendscode1 <- PrePrimary2 %>%
  group_by(DaysBefore) %>%
  dplyr::summarize(
    Negative = mean(Negative_Code1)*100
  ) %>%
  ungroup()

trendscode2 <- PrePrimary2 %>%
  group_by(DaysBefore) %>%
  dplyr::summarize(
    Negative = mean(Negative_Code2)*100
    
  ) %>%
  ungroup()

trendsrecon <- PrePrimary2 %>%
  group_by(DaysBefore) %>%
  dplyr::summarize(
    Negative = mean(Negative_Recon)*100
  ) %>%
  ungroup()

trendshand$Model <- "HandCode"
trendscode1$Model <- "GPTCode1"
trendscode2$Model <- "GPTCode2"
trendsrecon$Model <- "Hybrid"

trends <- rbind(trendshand, trendscode1, trendscode2, trendsrecon)

trends$Model <- recode(trends$Model, GPTCode1 = 'GPT Coding 1',
                       GPTCode2  = 'GPT Coding 2',
                       HandCode = 'Manual Coding',
                       Hybrid  = 'Hybrid GPT-Human Coding')

trends$Model <- factor(trends$Model, levels = c("Manual Coding", "GPT Coding 1", "GPT Coding 2", "Hybrid GPT-Human Coding"))

ggplot(trends, aes(x=DaysBefore, y=Negative)) +
  geom_point(size = 0.75)+
  geom_line(size = 0.75)+
  ylab("Percentage of Daily Negative Tweets")+
  xlab("Days Before Primary Election")+
  geom_vline(xintercept = as.numeric(0), linetype="dotted", 
             color = "black", size=1)+
  ylim(0, 40)+
  facet_wrap(~Model)+
  theme_bw()
ggsave("PrePrimaryNegativePlot.png")

####################################################################
####Negativity Models

candavgs <- PrePrimary2 %>%
  group_by(Candidate) %>%
  dplyr::summarize(
    tweets = n(),
    Negative_Hand = sum(Negative_Hand),
    Negative_Code1 = sum(Negative_Code1),
    Negative_Code2 = sum(Negative_Code2),
    Negative_Recon = sum(Negative_Recon)
  ) %>%
  ungroup()

candtrim <- cands[!duplicated(cands$Candidate), ]

candavgs <- merge(candavgs, candtrim, by = "Candidate")

candavgs$Negative_Hand_Pct <- (candavgs$Negative_Hand / candavgs$tweets) *100
candavgs$Negative_Code1_Pct <- (candavgs$Negative_Code1 / candavgs$tweets) *100
candavgs$Negative_Code2_Pct <- (candavgs$Negative_Code2 / candavgs$tweets) *100
candavgs$Negative_Recon_Pct <- (candavgs$Negative_Recon / candavgs$tweets) *100

##
candavgs <- filter(candavgs, tweets > 9)

candavgs$tweets <- scale(candavgs$tweets)
candavgs$PVIParty <- scale(candavgs$PVIParty)

handmodel1 <- lm(Negative_Hand_Pct ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)
code1model1 <- lm(Negative_Code1_Pct ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)
code2model1 <- lm(Negative_Code2_Pct ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)
reconmodel1 <- lm(Negative_Recon_Pct ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)

stargazer(handmodel1, code1model1, code2model1, reconmodel1, type = "html", out = "PrimaryNegativityModels.htm")

################ ideology

trendshand <- PrePrimary2 %>%
  group_by(DaysBefore, Party) %>%
  dplyr::summarize(
    Ideology = mean(ideo_hand)
  ) %>%
  ungroup()

trendscode1 <- PrePrimary2 %>%
  group_by(DaysBefore, Party) %>%
  dplyr::summarize(
    Ideology = mean(ideo_code1)
  ) %>%
  ungroup()

trendscode2 <- PrePrimary2 %>%
  group_by(DaysBefore, Party) %>%
  dplyr::summarize(
    Ideology = mean(ideo_code2)
    
  ) %>%
  ungroup()

trendsrecon <- PrePrimary2 %>%
  group_by(DaysBefore, Party) %>%
  dplyr::summarize(
    Ideology = mean(ideo_recon)
  ) %>%
  ungroup()

trendshand$Model <- "HandCode"
trendscode1$Model <- "GPTCode1"
trendscode2$Model <- "GPTCode2"
trendsrecon$Model <- "Hybrid"

trends <- rbind(trendshand, trendscode1, trendscode2, trendsrecon)

trends$Model <- recode(trends$Model, GPTCode1 = 'GPT Coding 1',
                       GPTCode2  = 'GPT Coding 2',
                       HandCode = 'Manual Coding',
                       Hybrid  = 'Hybrid GPT-Human Coding')

trends$Model <- factor(trends$Model, levels = c("Manual Coding", "GPT Coding 1", "GPT Coding 2", "Hybrid GPT-Human Coding"))


trends$Party <- as.factor(trends$Party)

ggplot(trends, aes(x=DaysBefore, y=Ideology, color = Party)) +
  geom_point(size = 0.75)+
  geom_line(size = 0.75)+
  ylab("Average Ideology of Party Candidate Tweets")+
  xlab("Days Before Primary Election")+
  scale_color_manual(values = c("grey20", "grey65"))+
  geom_vline(xintercept = as.numeric(0), linetype="dotted", 
             color = "black", size=1)+
  ylim(0.5,1.5)+
  facet_wrap(~Model)+
  theme_bw()
ggsave("PrePrimaryIdeologyPlot.png")


########Ideology Models

candavgs <- PrePrimary2 %>%
  group_by(Candidate) %>%
  dplyr::summarize(
    tweets = n(),
    Ideology_Hand = mean(ideo_hand),
    Ideology_Code1 = mean(ideo_code1),
    Ideology_Code2 = mean(ideo_code2),
    Ideology_Recon = mean(ideo_recon)
  ) %>%
  ungroup()

candtrim <- cands[!duplicated(cands$Candidate), ]

candavgs <- merge(candavgs, candtrim, by = "Candidate")

##
candavgs <- filter(candavgs, tweets > 9)

candavgs$tweets <- scale(candavgs$tweets)
candavgs$PVIParty <- scale(candavgs$PVIParty)

handmodel2 <- lm(Ideology_Hand ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)
code1model2 <- lm(Ideology_Code1 ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)
code2model2 <- lm(Ideology_Code2 ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)
reconmodel2 <- lm(Ideology_Recon ~ Chamber + Incumbent + Experience + Gender + Party + PVIParty + tweets, data = candavgs)

stargazer(handmodel2, code1model2, code2model2, reconmodel2, type = "html", out = "PrimaryIdeologyModels.htm")

###################################################
######################Combined Model Plots

multiplot(handmodel1, code1model1, code2model1, reconmodel1, innerCI = 0, intercept = FALSE, title = "Models: Percentage of Pre-Primary Candidate Negative Tweets", sort = "alphabetical", newNames = c(tweets = 'Total Tweets', PVIParty = "Electoral Safety", PartyR = "Republican", ChamberSenate = "Senate")) + scale_color_manual(labels=c('GPT Round 1', 'GPT Round 2', 'Hand Coded', 'Hybrid'), values = c("grey62", "grey45", "grey25", "grey1")) +ylab("Variable") + xlab("Coeffcient Estimate") + theme_bw()
ggsave("NegativeCoefPlot.png")
multiplot(handmodel2, code1model2, code2model2, reconmodel2, innerCI = 0, intercept = FALSE, title = "Models: Average Pre-Primary Candidate Tweet Ideology", sort = "alphabetical", newNames = c(tweets = 'Total Tweets', PVIParty = "Electoral Safety", PartyR = "Republican", ChamberSenate = "Senate")) + scale_color_manual(labels=c('GPT Round 1', 'GPT Round 2', 'Hand Coded', 'Hybrid'), values = c("grey62", "grey45", "grey25", "grey1")) +ylab("Variable") + xlab("Coeffcient Estimate") + theme_bw()
ggsave("IdeologyCoefPlot.png")



